require(dplyr)
require(MCMCpack)
require(stringi)

#### combine datasets ####
pref.names <- c("北海道", "青森県", "岩手県", "宮城県", "秋田県", 
                "山形県", "福島県", "茨城県", "栃木県", "群馬県", 
                "埼玉県", "千葉県", "東京都", "神奈川県", "新潟県", 
                "富山県", "石川県", "福井県", "山梨県", "長野県", 
                "岐阜県", "静岡県", "愛知県", "三重県", "滋賀県", 
                "京都府", "大阪府", "兵庫県", "奈良県", "和歌山県", 
                "鳥取県", "島根県", "岡山県", "広島県", "山口県", 
                "徳島県", "香川県", "愛媛県", "高知県", "福岡県", 
                "佐賀県", "長崎県", "熊本県", "大分県", "宮崎県", 
                "鹿児島県", "沖縄県")

# mapping table of issues and variable names
issues <- read.csv("UTAS_issue_items.csv")

## 2003 survey
ut.03 <- read.csv("ates2v1.csv", na.strings = c("."), fileEncoding = "CP932")

round(mean(ut.03$response[ut.03$won != 4]), 3)  # number of respondents
round(mean(ut.03$response[ut.03$won != 4 & ut.03$partycod == 1]), 3)  # response rate

ut.03$won[ut.03$name == "木村隆秀"] <- 1  # kuriage
ut.03$won[ut.03$name == "山崎拓"] <- 1  # by-election

issues.03 <- subset(issues, ut.03 != "")
party.03 <- c("LDP", "DPJ", "CGP", "JCP", "SDP", "NCP", "other", "other", "other", "indep")
data.03 <- data.frame(name = ut.03$name, party = party.03[ut.03$partycod], 
                      tier = ifelse(ut.03$won == 1, "SMD", 
                                    ifelse(ut.03$won == 3, "zombie", "PR")), 
                      year = 2003, 
                      pref = ifelse(ut.03$prefectu > 47, NA, 
                                    pref.names[ut.03$prefectu]), 
                      dist = ifelse(ut.03$prefdist == 99, NA, ut.03$prefdist), 
                      term = NA, 
                      stringsAsFactors = FALSE)

data.03$name[data.03$name == "荒井聡"] <- "荒井聰"  # correct character inconsistency

# arrange variable names and coding
for (i in 1:nrow(issues.03)) {
  eval(parse(text = paste0("x <- ut.03$", issues.03$ut.03[i])))
  x <- ifelse(x == 1 | x == 2 | x == 3 | x == 4 | x == 5, x, NA)
  eval(parse(text = paste0("data.03$", issues.03$varname[i], " <- x")))
}

# discard losing candidates
data.03 <- subset(data.03, ut.03$won != 4)

## 2005 survey
ut.05 <- read.csv("2005ates0801171.csv", fileEncoding = "CP932")

round(mean((2 - ut.05$response)[ut.05$won != 4]), 3)  # number of respondents
round(mean((2 - ut.05$response)[ut.05$won != 4 & ut.05$partycod == 1]), 3)  # response rate

ut.05$won[ut.05$familyna == "大高"] <- 2  # kuriage
ut.05$won[ut.05$familyna == "泉原"] <- 2  # kuriage

issues.05 <- subset(issues, ut.05 != "")
party.05 <- c("LDP", "DPJ", "CGP", "JCP", "SDP", "PNP", "NPN", "other", "indep")
data.05 <- data.frame(name = paste(ut.05$familyna, ut.05$firstnam, sep = ""), 
                      party = party.05[ut.05$partycod], 
                      tier = ifelse(ut.05$won == 1, "SMD", 
                                    ifelse(ut.05$won == 3, "zombie", "PR")), 
                      year = 2005, 
                      pref = pref.names[ut.05$prefectu], 
                      dist = ut.05$prefdist, 
                      term = NA, 
                      stringsAsFactors = FALSE)

data.05$name[data.05$name == "西野あきら"] <- "西野陽"  # correct character inconsistency
data.05$name[data.05$name == "山中〓子"] <- "山中あき子"

# arrange variable names and coding
for (i in 1:nrow(issues.05)) {
  eval(parse(text = paste0("x <- ut.05$", issues.05$ut.05[i])))
  x <- ifelse(x == 1 | x == 2 | x == 3 | x == 4 | x == 5, x, NA)
  eval(parse(text = paste0("data.05$", issues.05$varname[i], " <- x")))
}

# discard losing candidates
data.05 <- subset(data.05, ut.05$won != 4)

## 2009 survey
ut.09 <- read.csv("2009UTASP20150910.csv", fileEncoding = "CP932")

round(mean(ut.09$RESPONSE[ut.09$RESULT != 4]), 3)  # number of respondents
round(mean(ut.09$RESPONSE[ut.09$RESULT != 4 & ut.09$PARTY == 1]), 3)  # response rate

ut.09$RESULT[ut.09$NAME == "今津＝寛        "] <- 3  # kuriage
ut.09$RESULT[ut.09$NAME == "望月＝義夫      "] <- 3  # kuriage
ut.09$RESULT[ut.09$NAME == "丹羽＝秀樹      "] <- 1  # by-election
ut.09$RESULT[ut.09$NAME == "宮路＝和明      "] <- 1  # by-election

issues.09 <- subset(issues, ut.09 != "")
party.09 <- c("LDP", "DPJ", "CGP", "JCP", "SDP", "PNP", "YP", "NPN", "other", "other", "other", "indep")
data.09 <- data.frame(name = gsub(" ", "", gsub("＝", "", ut.09$NAME)), 
                      party = party.09[ut.09$PARTY], 
                      tier = ifelse(ut.09$RESULT == 1, "SMD", 
                                    ifelse(ut.09$RESULT == 3, "zombie", "PR")), 
                      year = 2009, 
                      pref = pref.names[ut.09$PREFEC], 
                      dist = ut.09$DISTRICT, 
                      term = ut.09$TERM, 
                      stringsAsFactors = FALSE)

# arrange variable names and coding
for (i in 1:nrow(issues.09)) {
  eval(parse(text = paste0("x <- ut.09$", issues.09$ut.09[i])))
  if (issues.09$sign.09[i] == 1) {
    x <- ifelse(x == 1 | x == 2 | x == 3 | x == 4 | x == 5, 6 - x, NA)
  } else {
    x <- ifelse(x == 1 | x == 2 | x == 3 | x == 4 | x == 5, x, NA)
  }
  eval(parse(text = paste0("data.09$", issues.09$varname[i], " <- x")))
}

# discard losing candidates
data.09 <- subset(data.09, ut.09$RESULT != 4)  

## 2012 survey
ut.12 <- read.csv("2012UTASP20150910.csv", fileEncoding = "CP932")

round(mean(ut.12$RESPONSE[ut.12$RESULT != 0]), 3)  # number of respondents
round(mean(ut.12$RESPONSE[ut.12$RESULT != 0 & ut.12$PARTY == 2]), 3)  # response rate

issues.12 <- subset(issues, ut.12 != "")
party.12 <- c("DPJ", "LDP", "TPJ", "CGP", "JRP", "JCP", "YP", "SDP", "other", "PNP", "NPN", "other", "indep")
data.12 <- data.frame(name = gsub("＝", "", ut.12$NAME), 
                      party = party.12[ut.12$PARTY], 
                      tier = ifelse(ut.12$RESULT == 1, "SMD", 
                                    ifelse(ut.12$RESULT == 2, "zombie", "PR")), 
                      year = 2012, 
                      pref = ifelse(ut.12$PREFEC == 66, NA, 
                                    pref.names[ut.12$PREFEC]), 
                      dist = ifelse(ut.12$DISTRICT == 66, NA, ut.12$DISTRICT), 
                      term = NA, 
                      stringsAsFactors = FALSE)

# arrange variable names and coding
for (i in 1:nrow(issues.12)) {
  eval(parse(text = paste0("x <- ut.12$", issues.12$ut.12[i])))
  x <- ifelse(x == 1 | x == 2 | x == 3 | x == 4 | x == 5, x, NA)
  eval(parse(text = paste0("data.12$", issues.12$varname[i], " <- x")))
}

# discard losing candidates
data.12 <- subset(data.12, ut.12$RESULT != 0)

## 2014 survey
ut.14 <- read.csv("2014UTASP20150910.csv", fileEncoding = "CP932")

round(mean(ut.14$RESPONSE[ut.14$RESULT != 0]), 3)  # number of respondents
round(mean(ut.14$RESPONSE[ut.14$RESULT != 0 & ut.14$PARTY == 1]), 3)  # response rate

ut.14$RESULT[ut.14$NAME == "田畑＝毅"] <- 3  # kuriage

issues.14 <- subset(issues, ut.14 != "")
party.14 <- c("LDP", "DPJ", "JIP", "CGP", "PFG", "JCP", "PLP", "SDP", "other", "other", "other", "other", "indep")
data.14 <- data.frame(name = gsub("＝", "", ut.14$NAME), 
                      party = party.14[ut.14$PARTY], 
                      tier = ifelse(ut.14$RESULT == 1, "SMD", 
                                    ifelse(ut.14$RESULT == 2, "zombie", "PR")), 
                      year = 2014, 
                      pref = ifelse(ut.14$PREFEC == 66, NA, 
                                    pref.names[ut.14$PREFEC]), 
                      dist = ifelse(ut.14$DISTRICT == 66, NA, ut.14$DISTRICT), 
                      term = NA, 
                      stringsAsFactors = FALSE)

data.14$name[data.14$name == "金子恵美" & data.14$party == "LDP"] <- "金子めぐみ"  # distinguish between MPs with the same name

# discard losing candidates
for (i in 1:nrow(issues.14)) {
  eval(parse(text = paste0("x <- ut.14$", issues.14$ut.14[i])))
  x <- ifelse(x == 1 | x == 2 | x == 3 | x == 4 | x == 5, x, NA)
  eval(parse(text = paste0("data.14$", issues.14$varname[i], " <- x")))
}

# discard losing candidates
data.14 <- subset(data.14, ut.14$RESULT != 0)

## merged data
merged.data <- bind_rows(data.03, data.05, data.09, data.12, data.14)

#### IRT estimation ####
IRT.results <- list()
IRT.results[[1]] <- MCMCordfactanal(~., factors = 2, 
                                    lambda.constraints = list(Q1 = list(2, "-"), Q1 = list(3, 0), 
                                                              Q2 = list(2, "-"), Q2 = list(3, 0), 
                                                              Q11 = list(2, 0), Q11 = list(3, "+"), 
                                                              Q12 = list(2, 0), Q12 = list(3, "+")), 
                                    data = merged.data[, -1 * c(1:7)], 
                                    burnin = 1000, mcmc = 100000, thin = 50, L0 = 0.25, 
                                    tune = 0.1, seed = 11111, store.scores = TRUE)
IRT.results[[2]] <- MCMCordfactanal(~., factors = 2, 
                                    lambda.constraints = list(Q1 = list(2, "-"), Q1 = list(3, 0), 
                                                              Q2 = list(2, "-"), Q2 = list(3, 0), 
                                                              Q11 = list(2, 0), Q11 = list(3, "+"), 
                                                              Q12 = list(2, 0), Q12 = list(3, "+")), 
                                    data = merged.data[, -1 * c(1:7)], 
                                    burnin = 1000, mcmc = 100000, thin = 50, L0 = 0.25, 
                                    tune = 0.1, seed = 22222, store.scores = TRUE)
IRT.results[[3]] <- MCMCordfactanal(~., factors = 2, 
                                    lambda.constraints = list(Q1 = list(2, "-"), Q1 = list(3, 0), 
                                                              Q2 = list(2, "-"), Q2 = list(3, 0), 
                                                              Q11 = list(2, 0), Q11 = list(3, "+"), 
                                                              Q12 = list(2, 0), Q12 = list(3, "+")), 
                                    data = merged.data[, -1 * c(1:7)], 
                                    burnin = 1000, mcmc = 100000, thin = 50, L0 = 0.25, 
                                    tune = 0.1, seed = 33333, store.scores = TRUE)
# save(IRT.results, file = "IRT_results.Rdata")
# load("IRT_results.Rdata")

IRT.results.list <- mcmc.list(IRT.results[[1]], IRT.results[[2]], IRT.results[[3]])
# combine posterior draws
parameter.draw <- rbind(as.matrix(IRT.results[[1]]), as.matrix(IRT.results[[2]]), 
                        as.matrix(IRT.results[[3]]))

#### discrimination parameters ####
discrimination.1st <- append(colMeans(parameter.draw[, stri_detect_regex(colnames(parameter.draw), "Lambda.+\\.2")]), 
                             c(0, 0), 10)
discrimination.2nd <- c(0, 0, colMeans(parameter.draw[, stri_detect_regex(colnames(parameter.draw), "Lambda.+\\.3")]))
discrimination.table <- cbind(discrimination.1st, discrimination.2nd)
rownames(discrimination.table) <- issues$issue[1:68]
round(discrimination.table[order(abs(discrimination.table[, 1]), decreasing = TRUE), ], 2)

#### output ####
# ideal point estimates
merged.data$theta.1 <- colMeans(parameter.draw[, stri_detect_regex(colnames(parameter.draw), "phi\\..+\\.2")])
merged.data$theta.2 <- colMeans(parameter.draw[, stri_detect_regex(colnames(parameter.draw), "phi\\..+\\.3")])
# insert missing values to those of MPs who did not answer any issue items
merged.data$theta.1[rowSums(! is.na(merged.data[, -1 * c(1:7, 76, 77)])) == 0] <- NA
merged.data$theta.2[rowSums(! is.na(merged.data[, -1 * c(1:7, 76, 77)])) == 0] <- NA

ideal.point.data <- data.frame(name = merged.data$name, elec.term = merged.data$year, 
                               party = merged.data$party, tier = merged.data$tier, 
                               pref = merged.data$pref, dist = merged.data$dist, 
                               term = merged.data$term, 
                               theta.1 = merged.data$theta.1, 
                               theta.2 = merged.data$theta.2)
ideal.point.data$elec.term[ideal.point.data$elec.term == 2003] <- 43
ideal.point.data$elec.term[ideal.point.data$elec.term == 2005] <- 44
ideal.point.data$elec.term[ideal.point.data$elec.term == 2009] <- 45
ideal.point.data$elec.term[ideal.point.data$elec.term == 2012] <- 46
ideal.point.data$elec.term[ideal.point.data$elec.term == 2014] <- 47

# write.csv(ideal.point.data, file = "ideal_point_data.csv", row.names = FALSE)